############################################################
# 3. SPATIAL ENRICHMENT
############################################################
crime2023_bins <- make_violence_bins(crime2023)
crime2024_bins <- make_violence_bins(crime2024)
crime_bins <- bind_rows(crime2023_bins, crime2024_bins) %>%
filter(!is.na(lng), !is.na(lat)) %>%
st_as_sf(coords = c("lng","lat"), crs = 4326, remove = FALSE) %>%
st_transform(CRS_TARGET)
crime_violent <- filter(crime_bins, violence_bin == "Violent")
crime_petty <- filter(crime_bins, violence_bin == "Misdemeanor/Non-violent")
# project parcels, ensure valid geom, attach an ID
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid() %>%
mutate(prop_id = row_number())
# create centroids for distance/buffer ops
props_centroids <- st_centroid(clean_data)[, "prop_id"]
# make ~1 block (~300ft) and ~2 blocks (~600ft) buffers
BLK <- get_block_dists(CRS_TARGET, one_block_ft = 300)
buf_1blk <- st_buffer(props_centroids, BLK$one)[, "prop_id"]
buf_2blk <- st_buffer(props_centroids, BLK$two)[, "prop_id"]
# track all IDs so every parcel gets a value
prop_id_tbl <- st_drop_geometry(props_centroids) %>% select(prop_id)
violent_counts <- st_join(buf_2blk, crime_violent, join = st_intersects, left = TRUE) %>%
st_drop_geometry() %>%
group_by(prop_id) %>%
summarise(violent_2blocks = sum(!is.na(lat)), .groups = "drop") %>%
right_join(prop_id_tbl, by = "prop_id") %>%
mutate(violent_2blocks = coalesce(violent_2blocks, 0L))
petty_counts <- st_join(buf_1blk, crime_petty, join = st_intersects, left = TRUE) %>%
st_drop_geometry() %>%
group_by(prop_id) %>%
summarise(petty_1block = sum(!is.na(lat)), .groups = "drop") %>%
right_join(prop_id_tbl, by = "prop_id") %>%
mutate(petty_1block = coalesce(petty_1block, 0L))
clean_data <- clean_data %>%
left_join(violent_counts, by = "prop_id") %>%
left_join(petty_counts, by = "prop_id")
### 3b. Neighborhood join
candidate_cols <- c(
"name","neighborhood","neighborhood_name","mapname",
"map_name","label","area_name","neigh_name","NAME","LABEL"
)
hit <- intersect(candidate_cols, names(neigh_raw))
NEIGH_NAME_COL <- if (length(hit) >= 1) hit[1] else {
stop("Couldn't find a neighborhood name column in neigh_raw.")
}
neigh <- neigh_raw %>%
st_make_valid() %>%
st_transform(CRS_TARGET) %>%
select(neigh_name = !!sym(NEIGH_NAME_COL))
props_ctr <- st_centroid(clean_data)[, "prop_id"]
props_with_neigh <- st_join(props_ctr, neigh, join = st_within, left = TRUE)
clean_data <- clean_data %>%
left_join(
st_drop_geometry(props_with_neigh) %>% select(prop_id, neigh_name),
by = "prop_id"
)
### 3c. Transit access
stops <- stops_raw %>%
st_make_valid() %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
# recompute props_ctr just in case
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid() %>%
mutate(prop_id = coalesce(prop_id, row_number()))
props_ctr <- st_centroid(clean_data)[, "prop_id"]
nn_idx <- st_nearest_feature(props_ctr, stops)
nearest_stops <- stops[nn_idx, ]
dist_vec <- st_distance(props_ctr, nearest_stops, by_element = TRUE)
crs_units <- tryCatch(st_crs(CRS_TARGET)$units_gdal, error = function(e) NA_character_)
dist_stop_ft <- to_feet(dist_vec, crs_units)
cand <- c("stop_name","name","STOP_NAME","stop_id","id","STOP_ID")
hit <- intersect(cand, names(stops))
stop_id_col <- if (length(hit) >= 1) hit[1] else NA_character_
nearest_tbl <- st_drop_geometry(nearest_stops) %>%
mutate(
prop_id = props_ctr$prop_id,
dist_stop_ft = dist_stop_ft
) %>%
{ if (!is.na(stop_id_col)) {
select(., prop_id, dist_stop_ft,
nearest_stop = !!sym(stop_id_col))
} else {
select(., prop_id, dist_stop_ft)
}
}
clean_data <- clean_data %>%
left_join(nearest_tbl, by = "prop_id")
### 3d. Bike network access
# turn polygons into boundaries if needed (so distance-to-line makes sense)
types <- unique(st_geometry_type(bike_raw))
bike_geom <- st_geometry(bike_raw)
if (any(grepl("POLYGON", types))) {
bike_geom <- st_boundary(bike_geom)
}
bike <- bike_raw %>%
st_set_geometry(bike_geom) %>%
st_cast("MULTILINESTRING", warn = FALSE) %>%
st_make_valid() %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
props_ctr <- st_centroid(clean_data)[, "prop_id"]
nn_idx <- st_nearest_feature(props_ctr, bike)
nearest_bike <- bike[nn_idx, ]
dist_vec <- st_distance(props_ctr, nearest_bike, by_element = TRUE)
dist_bike_ft <- to_feet(dist_vec, crs_units)
cand <- c("name","route","route_name","facility","type","street","NAME","ROUTE")
hit <- intersect(cand, names(bike))
bike_col <- if (length(hit) >= 1) hit[1] else NA_character_
nearest_tbl <- st_drop_geometry(nearest_bike) %>%
mutate(
prop_id = props_ctr$prop_id,
dist_bike_ft = dist_bike_ft
) %>%
{ if (!is.na(bike_col)) {
select(., prop_id, dist_bike_ft,
nearest_bike_feature = !!sym(bike_col))
} else {
select(., prop_id, dist_bike_ft)
}
}
clean_data <- clean_data %>%
left_join(nearest_tbl, by = "prop_id")
### 3e. Hospital access
hosp <- hosp_raw %>%
st_make_valid() %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
props_ctr <- st_centroid(clean_data)[, "prop_id"]
nn_idx <- st_nearest_feature(props_ctr, hosp)
nearest_hosp <- hosp[nn_idx, ]
dist_vec <- st_distance(props_ctr, nearest_hosp, by_element = TRUE)
dist_hosp_mi <- to_miles(dist_vec, crs_units)
cand <- c("name","NAME","hospital","Hospital","FACILITY","facility",
"HOSPITAL","inst_name","INST_NAME","id","ID")
hit <- intersect(cand, names(hosp))
hosp_col <- if (length(hit) >= 1) hit[1] else NA_character_
nearest_tbl <- st_drop_geometry(nearest_hosp) %>%
mutate(
prop_id = props_ctr$prop_id,
dist_hosp_mi = dist_hosp_mi
) %>%
{ if (!is.na(hosp_col)) {
select(., prop_id, dist_hosp_mi,
nearest_hospital = !!sym(hosp_col))
} else {
select(., prop_id, dist_hosp_mi)
}
} %>%
mutate(
service_band_code = case_when(
dist_hosp_mi < 1 ~ 1L,
dist_hosp_mi >= 1 & dist_hosp_mi < 5 ~ 2L,
TRUE ~ 3L
),
service_band = factor(
service_band_code,
levels = c(1L, 2L, 3L),
labels = c("Too Close", "Within Service", "Out of Service"),
ordered = TRUE
)
)
clean_data <- clean_data %>%
left_join(nearest_tbl, by = "prop_id")
### 3f. Park access
# only polygon parks
is_poly <- st_geometry_type(parks_raw) %in% c("POLYGON","MULTIPOLYGON")
parks <- parks_raw[is_poly, ] %>%
st_make_valid() %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
nn_idx <- st_nearest_feature(clean_data, parks)
nearest_parks <- parks[nn_idx, ]
dist_vec <- st_distance(clean_data, nearest_parks, by_element = TRUE)
units_gdal <- tryCatch(st_crs(CRS_TARGET)$units_gdal, error = function(e) NA_character_)
dist_park_ft <- to_feet(dist_vec, units_gdal)
dist_park_mi <- to_miles(dist_vec, units_gdal)
cand <- c("park_name","name","NAME","asset_name","ASSET_NAME",
"site_name","SITE_NAME","prop_name","PROP_NAME")
hit <- intersect(cand, names(parks))
park_col <- if (length(hit) >= 1) hit[1] else NA_character_
nearest_tbl <- st_drop_geometry(nearest_parks) %>%
mutate(
prop_id = clean_data$prop_id,
dist_park_ft = dist_park_ft,
dist_park_mi = dist_park_mi
) %>%
{ if (!is.na(park_col)) {
select(., prop_id, dist_park_ft, dist_park_mi,
nearest_park = !!sym(park_col))
} else {
select(., prop_id, dist_park_ft, dist_park_mi)
}
}
clean_data <- clean_data %>%
left_join(nearest_tbl, by = "prop_id")
### 3g. School access
schools <- schools_in %>%
st_make_valid() %>%
mutate(type_norm = norm_school_type(.data[["type_specific"]])) %>%
filter(!is.na(type_norm))
# if polygons, collapse to point-on-surface
if (any(st_geometry_type(schools) %in% c("POLYGON","MULTIPOLYGON"))) {
st_geometry(schools) <- st_point_on_surface(st_geometry(schools))
}
schools <- schools %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
sch_pub <- filter(schools, type_norm == "public")
sch_char <- filter(schools, type_norm == "charter")
sch_priv <- filter(schools, type_norm == "private")
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
props_ctr <- st_centroid(clean_data)[, "prop_id"]
units_gdal <- tryCatch(st_crs(CRS_TARGET)$units_gdal, error = function(e) NA_character_)
dist_school_public_ft <- nearest_dist_ft(props_ctr, sch_pub, units_gdal)
dist_school_charter_ft <- nearest_dist_ft(props_ctr, sch_char, units_gdal)
dist_school_private_ft <- nearest_dist_ft(props_ctr, sch_priv, units_gdal)
dist_tbl <- st_drop_geometry(props_ctr) %>%
transmute(
prop_id,
dist_school_public_ft = dist_school_public_ft,
dist_school_charter_ft = dist_school_charter_ft,
dist_school_private_ft = dist_school_private_ft
)
clean_data <- clean_data %>%
left_join(dist_tbl, by = "prop_id")
### 3h. Food access
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
props_ctr <- st_centroid(clean_data)[, "prop_id"]
food_proc <- food_raw %>%
st_make_valid()
if (any(st_geometry_type(food_proc) %in% c("POLYGON","MULTIPOLYGON"))) {
st_geometry(food_proc) <- st_point_on_surface(st_geometry(food_proc))
}
food_proc <- food_proc %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
if (nrow(food_proc) == 0L) {
clean_data <- clean_data %>%
mutate(
dist_foodretail_ft = NA_real_,
foodretail_1mi_count = 0L
)
} else {
units_gdal <- tryCatch(st_crs(CRS_TARGET)$units_gdal, error = function(e) NA_character_)
one_mile <- if (isTRUE(units_gdal %in% c("metre","m"))) 1609.344 else 5280
nn_idx <- st_nearest_feature(props_ctr, food_proc)
nearest_food <- food_proc[nn_idx, ]
dist_vec <- st_distance(props_ctr, nearest_food, by_element = TRUE)
dist_foodretail_ft <- to_feet(dist_vec, units_gdal)
buf_1mi <- st_buffer(props_ctr, one_mile)[, "prop_id"]
cnt_tbl <- st_join(buf_1mi, food_proc, join = st_intersects, left = TRUE) %>%
st_drop_geometry() %>%
count(prop_id, name = "foodretail_1mi_count")
dist_tbl <- st_drop_geometry(props_ctr) %>%
transmute(prop_id, dist_foodretail_ft = dist_foodretail_ft)
clean_data <- clean_data %>%
left_join(dist_tbl, by = "prop_id") %>%
left_join(cnt_tbl, by = "prop_id") %>%
mutate(foodretail_1mi_count = coalesce(foodretail_1mi_count, 0L))
}
### 3i. Police access
pol_proc <- pol_raw %>%
st_make_valid()
if (any(st_geometry_type(pol_proc) %in% c("POLYGON","MULTIPOLYGON"))) {
st_geometry(pol_proc) <- st_point_on_surface(st_geometry(pol_proc))
}
pol_proc <- pol_proc %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
if (nrow(pol_proc) == 0L) {
clean_data <- clean_data %>%
mutate(dist_police_ft = NA_real_)
} else {
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
props_ctr <- st_centroid(clean_data)[, "prop_id"]
nn_idx <- st_nearest_feature(props_ctr, pol_proc)
nearest_pol <- pol_proc[nn_idx, ]
dist_vec <- st_distance(props_ctr, nearest_pol, by_element = TRUE)
units_gdal <- tryCatch(st_crs(CRS_TARGET)$units_gdal, error = function(e) NA_character_)
dist_police_ft <- to_feet(dist_vec, units_gdal)
cand <- c("name","station","precinct","district","NAME","STATION",
"DISTRICT","FACILITY","facility","id","ID")
hit <- intersect(cand, names(pol_proc))
pol_col <- if (length(hit) >= 1) hit[1] else NA_character_
nearest_tbl <- st_drop_geometry(nearest_pol) %>%
mutate(
prop_id = props_ctr$prop_id,
dist_police_ft = dist_police_ft
) %>%
{ if (!is.na(pol_col)) {
select(., prop_id, dist_police_ft,
nearest_police = !!sym(pol_col))
} else {
select(., prop_id, dist_police_ft)
}
}
clean_data <- clean_data %>%
left_join(nearest_tbl, by = "prop_id")
}
### 3j. Fire station access
fire_proc <- fire_raw %>%
st_make_valid()
if (any(st_geometry_type(fire_proc) %in% c("POLYGON","MULTIPOLYGON"))) {
st_geometry(fire_proc) <- st_point_on_surface(st_geometry(fire_proc))
}
fire_proc <- fire_proc %>%
st_transform(CRS_TARGET) %>%
filter(!st_is_empty(geometry))
if (nrow(fire_proc) == 0L) {
clean_data <- clean_data %>%
mutate(dist_fire_ft = NA_real_)
} else {
clean_data <- clean_data %>%
st_transform(CRS_TARGET) %>%
st_make_valid()
props_ctr <- st_centroid(clean_data)[, "prop_id"]
nn_idx <- st_nearest_feature(props_ctr, fire_proc)
nearest_fire <- fire_proc[nn_idx, ]
dist_vec <- st_distance(props_ctr, nearest_fire, by_element = TRUE)
units_gdal <- tryCatch(st_crs(CRS_TARGET)$units_gdal, error = function(e) NA_character_)
dist_fire_ft <- to_feet(dist_vec, units_gdal)
cand <- c("name","station","facility","FACILITY","STATION",
"company","COMPANY","house","HOUSE","id","ID")
hit <- intersect(cand, names(fire_proc))
fire_col <- if (length(hit) >= 1) hit[1] else NA_character_
nearest_tbl <- st_drop_geometry(nearest_fire) %>%
mutate(
prop_id = props_ctr$prop_id,
dist_fire_ft = dist_fire_ft
) %>%
{ if (!is.na(fire_col)) {
select(., prop_id, dist_fire_ft,
nearest_fire = !!sym(fire_col))
} else {
select(., prop_id, dist_fire_ft)
}
}
clean_data <- clean_data %>%
left_join(nearest_tbl, by = "prop_id")
}